'''
base url:  https://dushu.baidu.com/pc/detail?gid=4295122774


                                                  "book_id"    "4295122774"
https://dushu.baidu.com/api/pc/getCatalog?data={%22book_id%22:%224295122774%22}  章节



cid: "1567578898"
price_status: "0"
title: "第2章 情不自禁"

https://dushu.baidu.com/pc/reader?gid=4295122774&cid=1567578898   详情页 这个网址无法打开小说内容，内容是动态加载，为触发 js无法加载
需找API
详情页 API
https://dushu.baidu.com/api/pc/getChapterContent?data={%22book_id%22:%224295122774%22%22cid%22:%224295122774|1567578898%22%22need_bookinfo%22:1}

{"book_id":"4295122774","cid":"4295122774|1567578898","need_bookinfo":1}


ConnectionResetError: [WinError 10054] 远程主机强迫关闭了一个现有的连接。   服务器给你断开了连接，要用IP切换访问   以后上班公司会配IP代理池
 '''
import asyncio
import aiohttp
from utils.base import Spider
import requests
from motor.motor_asyncio import AsyncIOMotorClient  #  mongo 异步保存数据的库

client = AsyncIOMotorClient('localhost',27017)
colle = client['python']['xiaoshuo']

b_id = "4295122774"
headers ={'User-Agent':Spider.get_ua()}
print(headers)
# print(headers)
base_url = 'https://dushu.baidu.com/api/pc/getCatalog?data={"book_id":"%s"}'%b_id # 章节页
print(base_url)

async def download(cid,b_id,title):
    data = f'"book_id":"{b_id}","cid":"{b_id}|{cid}","need_bookinfo":1'

    detail_url = 'https://dushu.baidu.com/api/pc/getChapterContent?data={%s}'%data# 详情页
    print(detail_url)
    async with aiohttp.ClientSession(headers = headers) as session:
        async with session.get(detail_url) as res:
            dic = await res.json()
            # print(dic)
            datas = {
                "content":dic['data']['novel']['content'],
                "title":title
            }
            print(title+'写入成功')
            await save_data(datas)

async def save_data(data):
    if isinstance(data,dict):
        return await colle.insert_one(data)



# 获取章节地址
async def get_Cat(url):
    res = requests.get(url,headers)
    dic = res.json()
    tasks = []
    for i in dic['data']['novel']['items']:
        title = i.get('title')
        cid = i.get('cid')
        # print(title+cid)
        tasks.append(download(cid,b_id,title))
    await asyncio.wait(tasks)

if __name__ == '__main__':
    loop = asyncio.get_event_loop()
    loop.run_until_complete(get_Cat(base_url))